********************************************************************************
*********    5.  Merge all datasets                              ***************
********************************************************************************
use $data/SIAB_controls.dta, clear
drop if region == .
merge 1:1 region year using $data/robots_per_kreis, nogenerate
merge 1:1 region year using $data/trade_kreis.dta, nogenerate
merge 1:1 region year using $data/ICT_kreis.dta, nogenerate
merge 1:1 region year using ${data}/baseline_controls.dta, nogenerate

*** ICT: use 1995 values for 1994
bysort region (year): replace ICT_k = ICT_k[_n+1] if year == 1994

*** Label robot variable
label var robot_intens "Robots per 10k workers"

********************************************************************************
*********    6. Censor variables                               ***************
********************************************************************************

*** 6.1.1 SIAB: variables with other count variables
foreach var in emp_foreign emp_female emp emp_manufacturing emp_above50 ///
 emp_lq emp_mq emp_hq emp_edu_no emp_edu_low emp_edu_high wage wage_manufacturing ///
 wage_lq wage_mq wage_hq {
	di "SIAB_`var'"
	count if SIAB_n_`var' < 20
	replace SIAB_`var' = . if SIAB_n_`var' < 20
	}

*** 6.1.2 SIAB: variables which directly reflect their count
foreach var in SIAB_foreign /// // SIAB_unemp SIAB_unemp_lq SIAB_unemp_mq SIAB_unemp_hq
	 SIAB_main_task_1 SIAB_main_task_2 SIAB_main_task_3 ///
	SIAB_main_task_4 SIAB_main_task_5 SIAB_kids_no SIAB_kids_yes ///
	SIAB_n_lowskill SIAB_n_midskill SIAB_n_highskill {
	di "`var'"
	count if `var'  < 20
	replace `var' = . if `var' < 20
	}

*** 6.1.3 SIAB: Task composition
count if SIAB_n_gwkomp < 20
// all cells have at least 20 observations

*** 6.2 SIAB-BHP base years (1984-1984)
count if base_emp < 20
// all cells have at least 20 observations (here we use head-counts, not ft-eq)


foreach var in manufacturing agriculture food_products consumer_goods ///
	industrial_goods capital_goods construction maintenance services public {
	di "`var'"
	count if base_employment_`var'  < 20
	replace base_employment_`var' = . if base_employment_`var' < 20

	// I drop base_employment_public as an addional category to avoid that small cell can be calculated as residual category
	//base_employment_public is always larger than 20
	replace base_employment_public = . if base_employment_`var' == .
	}

*** 6.3 Robots, Trade & ICT variables
foreach var in n_robots n_trade n_ICT {
	di "`var'"
	count if `var'  < 20
	}
// all cells have at least 20 observations


save $data/aggregated_sample.dta, replace

********************************************************************************
********************                END PART 2         *************************
********************************************************************************












